library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ ggplot2   3.5.1     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)


all_stats_df <- read_csv('../data/Player_Stats.csv')
## Rows: 563 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): Team_Alias, Player_Name, Position
## dbl (11): Minutes, PPG, APG, DefRBD, OffRBD, SPG, BPG, Paint_PPG, att_3PT_pg...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Player_data <- all_stats_df %>% filter(Minutes >= 528)

head(Player_data)
## # A tibble: 6 × 14
##   Team_Alias Player_Name Minutes   PPG   APG DefRBD OffRBD   SPG   BPG Paint_PPG
##   <chr>      <chr>         <dbl> <dbl> <dbl>  <dbl>  <dbl> <dbl> <dbl>     <dbl>
## 1 MIN        Kyle Ander…    1782   6.4   4.2   2.7    0.76  0.9   0.59      4.28
## 2 MIN        Rudy Gobert    2593  14     1.3   9.17   3.75  0.68  2.13     10.6 
## 3 MIN        Mike Conley    2193  11.4   5.9   2.38   0.49  1.16  0.22      2.63
## 4 MIN        Naz Reid       1964  13.5   1.3   4.32   0.9   0.78  0.9       5.68
## 5 MIN        Jaden McDa…    2105  10.5   1.4   2.33   0.78  0.88  0.58      5.36
## 6 MIN        Karl-Antho…    2026  21.8   3     6.79   1.52  0.69  0.66      9.9 
## # ℹ 4 more variables: att_3PT_pg <dbl>, made_3PT_pg <dbl>, ATO <dbl>,
## #   Position <chr>
# Inspect the structure of Player_data
str(Player_data)
## spc_tbl_ [300 × 14] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Team_Alias : chr [1:300] "MIN" "MIN" "MIN" "MIN" ...
##  $ Player_Name: chr [1:300] "Kyle Anderson" "Rudy Gobert" "Mike Conley" "Naz Reid" ...
##  $ Minutes    : num [1:300] 1782 2593 2193 1964 2105 ...
##  $ PPG        : num [1:300] 6.4 14 11.4 13.5 10.5 21.8 3.5 25.9 8 3.3 ...
##  $ APG        : num [1:300] 4.2 1.3 5.9 1.3 1.4 3 2 5.1 2.5 1 ...
##  $ DefRBD     : num [1:300] 2.7 9.17 2.38 4.32 2.33 6.79 0.96 4.78 1.61 2.06 ...
##  $ OffRBD     : num [1:300] 0.76 3.75 0.49 0.9 0.78 1.52 0.32 0.66 0.43 1.49 ...
##  $ SPG        : num [1:300] 0.9 0.68 1.16 0.78 0.88 0.69 0.63 1.28 0.78 0.24 ...
##  $ BPG        : num [1:300] 0.59 2.13 0.22 0.9 0.58 0.66 0.13 0.53 0.51 0.29 ...
##  $ Paint_PPG  : num [1:300] 4.28 10.61 2.63 5.68 5.36 ...
##  $ att_3PT_pg : num [1:300] 0.61 0.04 5.33 5.04 3.54 5.27 1.59 6.73 4.09 0.02 ...
##  $ made_3PT_pg: num [1:300] 0.14 0 2.36 2.09 1.19 2.19 0.75 2.41 1.6 0 ...
##  $ ATO        : num [1:300] 3.64 0.86 4.4 0.95 1.16 1.07 5.79 1.68 2.68 1.76 ...
##  $ Position   : chr [1:300] "F" "C" "G" "C" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Team_Alias = col_character(),
##   ..   Player_Name = col_character(),
##   ..   Minutes = col_double(),
##   ..   PPG = col_double(),
##   ..   APG = col_double(),
##   ..   DefRBD = col_double(),
##   ..   OffRBD = col_double(),
##   ..   SPG = col_double(),
##   ..   BPG = col_double(),
##   ..   Paint_PPG = col_double(),
##   ..   att_3PT_pg = col_double(),
##   ..   made_3PT_pg = col_double(),
##   ..   ATO = col_double(),
##   ..   Position = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>
# Define columns that should be converted to specific types
factor_columns <- c("Team_Alias", "Player_Name", "Position")
numeric_columns <- c("Minutes", "PPG", "APG", "DefRBD", "OffRBD", "SPG", "BPG", 
                     "Paint_PPG", "att_3PT_pg", "made_3PT_pg", "ATO")

# Loop to convert columns to factors
for (col in factor_columns) {
  Player_data[[col]] <- as.factor(Player_data[[col]])
}

# Loop to convert columns to numeric
for (col in numeric_columns) {
  Player_data[[col]] <- as.numeric(Player_data[[col]])
}

# Check the updated structure of Player_data
str(Player_data)
## spc_tbl_ [300 × 14] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Team_Alias : Factor w/ 25 levels "ATL","BOS","CHA",..: 15 15 15 15 15 15 15 15 15 5 ...
##  $ Player_Name: Factor w/ 292 levels "Aaron Gordon",..: 187 247 214 220 120 168 154 15 223 277 ...
##  $ Minutes    : num [1:300] 1782 2593 2193 1964 2105 ...
##  $ PPG        : num [1:300] 6.4 14 11.4 13.5 10.5 21.8 3.5 25.9 8 3.3 ...
##  $ APG        : num [1:300] 4.2 1.3 5.9 1.3 1.4 3 2 5.1 2.5 1 ...
##  $ DefRBD     : num [1:300] 2.7 9.17 2.38 4.32 2.33 6.79 0.96 4.78 1.61 2.06 ...
##  $ OffRBD     : num [1:300] 0.76 3.75 0.49 0.9 0.78 1.52 0.32 0.66 0.43 1.49 ...
##  $ SPG        : num [1:300] 0.9 0.68 1.16 0.78 0.88 0.69 0.63 1.28 0.78 0.24 ...
##  $ BPG        : num [1:300] 0.59 2.13 0.22 0.9 0.58 0.66 0.13 0.53 0.51 0.29 ...
##  $ Paint_PPG  : num [1:300] 4.28 10.61 2.63 5.68 5.36 ...
##  $ att_3PT_pg : num [1:300] 0.61 0.04 5.33 5.04 3.54 5.27 1.59 6.73 4.09 0.02 ...
##  $ made_3PT_pg: num [1:300] 0.14 0 2.36 2.09 1.19 2.19 0.75 2.41 1.6 0 ...
##  $ ATO        : num [1:300] 3.64 0.86 4.4 0.95 1.16 1.07 5.79 1.68 2.68 1.76 ...
##  $ Position   : Factor w/ 7 levels "C","C-F","F",..: 3 1 6 1 3 2 6 6 6 3 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Team_Alias = col_character(),
##   ..   Player_Name = col_character(),
##   ..   Minutes = col_double(),
##   ..   PPG = col_double(),
##   ..   APG = col_double(),
##   ..   DefRBD = col_double(),
##   ..   OffRBD = col_double(),
##   ..   SPG = col_double(),
##   ..   BPG = col_double(),
##   ..   Paint_PPG = col_double(),
##   ..   att_3PT_pg = col_double(),
##   ..   made_3PT_pg = col_double(),
##   ..   ATO = col_double(),
##   ..   Position = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>
# Exclude Player_Name and Team_Alias for clustering
Player_data_without_names <- Player_data[, !(names(Player_data) %in% c("Player_Name", "Team_Alias"))]

# Create a Gower's dissimilarity matrix
library(cluster)
dissimilarity_matrix <- daisy(Player_data_without_names, metric = "gower")
library(cluster)
library(ggplot2)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
mds_result <- cmdscale(dissimilarity_matrix, k = 2)
mds_data <- as.data.frame(mds_result)
colnames(mds_data) <- c("MDS1", "MDS2")
mds_data$Player_Name <- Player_data$Player_Name


clusters <- cutree(hclust(dissimilarity_matrix), k = 5)
mds_data$cluster <- as.factor(clusters)

# Manually assign "Superstar Big Men" cluster
superstar_big_men <- c(
  "Nikola Jokić", 
  "Victor Wembanyama", 
  "Domantas Sabonis", 
  "Anthony Davis", 
  "Giannis Antetokounmpo"
)

mds_data$cluster_name <- ifelse(
  mds_data$Player_Name %in% superstar_big_men,
  "Superstar Big Men",
  as.character(clusters)
)

# Map cluster names
cluster_names <- c(
  "1" = "Role-Playing Forwards",
  "2" = "Defensive Centers",
  "3" = "Playmaking Guards",
  "4" = "Limited-Minute Guards",
  "5" = "Elite All-Around Forwards"
)

# Update cluster names for remaining players
mds_data$cluster_name <- ifelse(
  mds_data$cluster_name == "Superstar Big Men",
  "Superstar Big Men",
  cluster_names[mds_data$cluster_name]
)

# Create an interactive plot with Plotly
interactive_plot <- plot_ly(
  data = mds_data,
  x = ~MDS1,
  y = ~MDS2,
  color = ~cluster_name,
  colors = c(
    "blue", "purple", "black", "orange", "red", "green"
  ),  # Add color for "Superstar Big Men"
  text = ~paste(
    "Player Name:", Player_Name,
    "<br>Cluster:", cluster_name
  ),
  type = "scatter",
  mode = "markers",
  marker = list(size = 8, opacity = 0.8)
) %>%
  layout(
    title = "Interactive MDS Plot of Players by Cluster",
    xaxis = list(title = "MDS Dimension 1"),
    yaxis = list(title = "MDS Dimension 2"),
    legend = list(title = list(text = "Cluster Names"))
  )

# Show the interactive plot
interactive_plot